Stat 365: Statistical Communication
Today we will…
ggplot customization
“A comparison between two things, typically on the basis of their structure and for the purpose of explanation or clarification.”
Connect the statistical idea to an idea that the audience likely already understands
Relating the fundamental components of the idea.
Pictures can more easily demonstrate the idea.
Make sure the diagram is thoughtfully created.
Select a simple example, in a context that is relevant to the audiences expertise or field.
Then build upon it
Avoid statistical jargon and words with double meanings (e.g., confidence, error, etc.)
Use language relevant to the audience’s field
Be concise.
Explain at the level that meets the audience’s needs.
Only if such a definition will serve the audience’s needs
Understand how the audience will be using the technical definition
Use statistical notation carefully
| Statistical Idea | Analogy |
|---|---|
| Type I error | In a jury trial, convicting an innocent person |
| Type II error | In a jury trial, letting a guilty person go free |
| Statistical Idea | Analogy |
|---|---|
| Sample vs Census | Taking a blood sample at your health check-up |
ggplotpdfInclude a graphic in your Quarto document
library(tidyverse)
library(lubridate)
library(ggthemes)
women_orig <- read_csv("women.csv")
women_clean <- women_orig |>
select(year, date, kids_HS_2534, kids_BAp_2534, nokids_HS_2534, nokids_BAp_2534) |>
pivot_longer(cols = c("kids_HS_2534",
"kids_BAp_2534",
"nokids_HS_2534",
"nokids_BAp_2534"),
names_to = "group",
values_to = "marriage_rate"
) |>
separate_wider_delim(cols = "group",
names = c("kids", "education", NA),
delim = "_"
) |>
mutate(marriage_rate = 1 - marriage_rate)
head(women_clean)# A tibble: 6 × 5
year date kids education marriage_rate
<dbl> <date> <chr> <chr> <dbl>
1 1960 1960-01-01 kids HS 0.996
2 1960 1960-01-01 kids BAp 0.998
3 1960 1960-01-01 nokids HS 0.608
4 1960 1960-01-01 nokids BAp 0.426
5 1970 1970-01-01 kids HS 0.982
6 1970 1970-01-01 kids BAp 0.995
women_clean |>
ggplot(aes(x = date,
y = marriage_rate)
) +
geom_line(aes(color = education),
linewidth = 1
) +
facet_wrap(~ kids) +
scale_y_continuous(limits = c(0,1),
labels = scales::percent,
breaks = seq(0,1,0.25)
) +
scale_x_date(limits = c(mdy("01-01-1960"),
mdy("01-01-2012")
),
breaks = c(mdy("01-01-1960"),
mdy("01-01-1970"),
mdy("01-01-1980"),
mdy("01-01-1990"),
mdy("01-01-2000"),
mdy("01-01-2010")
),
date_labels = c("%Y", "%y", "%y", "%y", "%Y", "%y")) +
scale_color_manual(values = c("#8c6bb1", "#810f7c")) +
theme(axis.title = element_blank())annotate + geom_text() + library(ggrepel)women_clean |>
ggplot(aes(x = date, y = marriage_rate)) +
geom_line(aes(color = education),
linewidth = 1) +
geom_text(data = tibble(kids = "kids"),
aes(label = "High school or less"),
x = mdy("01-01-1982"),
y = 0.6
) +
geom_text(data = tibble(kids = "kids"),
aes(label = "College graduates"),
x = mdy("01-01-1995"),
y = 1.05,
) +
facet_wrap(~ kids) +
scale_y_continuous(limits = c(0,1.1),
labels = scales::percent,
breaks = seq(0,1,0.25)
) +
scale_x_date(limits = c(mdy("01-01-1960"),
mdy("01-01-2012")
),
breaks = c(mdy("01-01-1960"),
mdy("01-01-1970"),
mdy("01-01-1980"),
mdy("01-01-1990"),
mdy("01-01-2000"),
mdy("01-01-2010")
),
date_labels = c("%Y", "%y", "%y", "%y", "%Y", "%y")) +
scale_color_manual(values = c("#8c6bb1", "#810f7c")) +
theme(axis.title = element_blank(),
legend.position = "none")women_clean |>
ggplot(aes(x = date, y = marriage_rate)) +
geom_line(aes(color = education),
linewidth = 1) +
geom_text(data = tibble(kids = "kids"),
aes(label = "High school or less"),
x = mdy("01-01-1982"),
y = 0.6
) +
geom_text(data = tibble(kids = "kids"),
aes(label = "College graduates"),
x = mdy("01-01-1995"),
y = 1.05,
) +
facet_wrap(~ kids,
labeller = labeller(kids = c(`kids` = "WITH CHILDREN",
`nokids` = "WITHOUT CHILDREN")
)
) +
scale_y_continuous(limits = c(0,1.1),
labels = scales::percent,
breaks = seq(0,1,0.25)
) +
scale_x_date(limits = c(mdy("01-01-1960"),
mdy("01-01-2012")
),
breaks = c(mdy("01-01-1960"),
mdy("01-01-1970"),
mdy("01-01-1980"),
mdy("01-01-1990"),
mdy("01-01-2000"),
mdy("01-01-2010")
),
date_labels = c("%Y", "%y", "%y", "%y", "%Y", "%y")) +
scale_color_manual(values = c("#8c6bb1", "#810f7c")) +
theme(axis.title = element_blank(),
legend.position = "none")women_clean |>
ggplot(aes(x = date, y = marriage_rate)) +
geom_line(aes(color = education),
linewidth = 1) +
geom_text(data = tibble(kids = "kids"),
aes(label = "High school or less"),
x = mdy("01-01-1982"),
y = 0.6
) +
geom_text(data = tibble(kids = "kids"),
aes(label = "College graduates"),
x = mdy("01-01-1995"),
y = 1.05,
) +
facet_wrap(~ kids,
labeller = labeller(kids = c(`kids` = "WITH CHILDREN",
`nokids` = "WITHOUT CHILDREN")
)
) +
scale_y_continuous(limits = c(0,1.1),
labels = scales::percent,
breaks = seq(0,1,0.25)
) +
scale_x_date(limits = c(mdy("01-01-1960"),
mdy("01-01-2012")
),
breaks = c(mdy("01-01-1960"),
mdy("01-01-1970"),
mdy("01-01-1980"),
mdy("01-01-1990"),
mdy("01-01-2000"),
mdy("01-01-2010")
),
date_labels = c("%Y", "%y", "%y", "%y", "%Y", "%y")) +
scale_color_manual(values = c("#8c6bb1", "#810f7c")) +
theme_fivethirtyeight() +
theme(axis.title = element_blank(),
legend.position = "none")women_clean |>
ggplot(aes(x = date, y = marriage_rate)) +
geom_line(aes(color = education),
linewidth = 1) +
geom_text(data = tibble(kids = "kids"),
aes(label = "High school or less"),
x = mdy("01-01-1982"),
y = 0.6
) +
geom_text(data = tibble(kids = "kids"),
aes(label = "College graduates"),
x = mdy("01-01-1995"),
y = 1.05,
) +
facet_wrap(~ kids,
labeller = labeller(kids = c(`kids` = "WITH CHILDREN",
`nokids` = "WITHOUT CHILDREN")
)
) +
scale_y_continuous(limits = c(0,1.1),
labels = scales::percent,
breaks = seq(0,1,0.25)
) +
scale_x_date(limits = c(mdy("01-01-1960"),
mdy("01-01-2012")
),
breaks = c(mdy("01-01-1960"),
mdy("01-01-1970"),
mdy("01-01-1980"),
mdy("01-01-1990"),
mdy("01-01-2000"),
mdy("01-01-2010")
),
date_labels = c("%Y", "%y", "%y", "%y", "%Y", "%y")) +
scale_color_manual(values = c("#8c6bb1", "#810f7c")) +
theme_fivethirtyeight() +
theme(axis.title = element_blank(),
legend.position = "none") +
labs(title = "Marriage Rates For Women By Education",
subtitle = "Ages 25 to 35")women_clean |>
ggplot(aes(x = date, y = marriage_rate)) +
geom_line(aes(color = education),
linewidth = 1) +
geom_text(data = tibble(kids = "kids"),
aes(label = "High school or less"),
x = mdy("01-01-1982"),
y = 0.6,
color = "gray30"
) +
geom_segment(data = tibble(kids = "kids"),
aes(x = mdy("01-01-1982"),
xend = mdy("01-01-1982"),
y = 0.65,
yend = 0.94
)
) +
geom_text(data = tibble(kids = "kids"),
aes(label = "College graduates"),
x = mdy("01-01-1995"),
y = 1.05,
color = "gray30"
) +
geom_segment(data = tibble(kids = "kids"),
aes(x = mdy("01-01-2003"),
xend = mdy("01-01-2003"),
y = 1,
yend = 0.94
)
) +
facet_wrap(~ kids,
labeller = labeller(kids = c(`kids` = "WITH CHILDREN",
`nokids` = "WITHOUT CHILDREN")
)
) +
scale_y_continuous(limits = c(0,1.1),
labels = c(0, 25, 50, 75, "100%"),
breaks = seq(0,1,0.25)
) +
scale_x_date(limits = c(mdy("01-01-1960"),
mdy("01-01-2012")
),
breaks = c(mdy("01-01-1960"),
mdy("01-01-1970"),
mdy("01-01-1980"),
mdy("01-01-1990"),
mdy("01-01-2000"),
mdy("01-01-2010")
),
date_labels = c("%Y", "'%y", "'%y", "'%y", "%Y", "'%y")) +
scale_color_manual(values = c("#8c6bb1", "#810f7c")) +
theme_fivethirtyeight() +
theme(axis.title = element_blank(),
legend.position = "none",
strip.text = element_text(face = "bold")) +
labs(title = "Marriage Rates For Women By Education",
subtitle = "Ages 25 to 35",
caption = "BASED ON CENSUS BUREAU DATA") +
geom_hline(yintercept = 0)library(patchwork)<br>Copy the Masters: First Draft
Draft: Results and Discussion